
* Quality-minus-junk replication/characteristic

use "~/Dropbox/Research/Data/CRSP-Compustat/merged_crsp_compustat.dta", clear

qui keep if yyyymm>=195306

* component 1: profitability 
qui gen qmj_prof1 = (sale - cogs) / at if ~missing(sale,cogs,at) 
qui gen qmj_prof2 = ib / be if ~missing(ib,be) & be>0
qui gen qmj_prof3 = ib / at if ~missing(ib,at) 
 
qui gen wc = act - lct - che + dlc + txp
qui gen L1wc = L1act - L1lct - L1che + L1dlc + L1txp
 
qui gen qmj_prof4 = (ib + dp - (wc - L1wc) - capx) / at if ~missing(ib,dp,wc,L1wc,capx,at) 
qui gen qmj_prof5 = (sale - cogs) / sale if ~missing(sale,cogs) & sale>0
qui gen qmj_prof6 = -((wc - L1wc) - dp) / at if ~missing(wc,L1wc,dp,at) 
 
* how many nonmissing?
egen ncount_prof = rownonmiss(qmj_prof?)

forvalues j=1/6 {
	qui bysort yyyymm: egen r = rank(qmj_prof`j') if ~missing(qmj_prof`j')
	qui bysort yyyymm: egen m = mean(r)
	qui bysort yyyymm: egen sd = sd(r)
	qui gen qmj_profz`j' = (r - m) / sd
	qui replace qmj_profz`j' = 0 if missing(qmj_profz`j')
	drop r m sd
	}

* z-score over the sum
gen qmj_profzsum = qmj_profz1 + qmj_profz2 + qmj_profz3 + qmj_profz4 + qmj_profz5 + qmj_profz6 if ncount_prof>0 & ~missing(ncount_prof)
qui bysort yyyymm: egen r = rank(qmj_profzsum) 
qui bysort yyyymm: egen m = mean(r)
qui bysort yyyymm: egen sd = sd(r)
qui gen qmj_profitability = (r - m) / sd
drop r m sd qmj_prof? qmj_profz? qmj_profzsum 


* component 2: quality
 
* all quantities are per share split-adjusted
gen adj_at=at/adj_shares
gen adj_gp=(sale-cogs)/adj_shares
gen adj_be=be/adj_shares
gen adj_ib=ib/adj_shares
gen adj_cf= (ib + dp - (wc - L1wc) - capx)/adj_shares
gen adj_sale=sale/adj_shares

xtset permno dateindex
 
qui gen qmj_qual1 = ((adj_gp - l12.adj_at)-(l60.adj_gp - l72.adj_at))/l5.adj_at
qui gen qmj_qual2 = ((adj_ib -l12.adj_be)- (l60.adj_ib - l72.adj_be))/l5.adj_be
qui gen qmj_qual3 = ((adj_ib -l12.adj_at)- (l60.adj_ib - l72.adj_at))/l5.adj_at
qui gen qmj_qual4 = ((adj_cf -l12.adj_at)- (l60.adj_cf - l72.adj_at))/l5.adj_at
qui gen qmj_qual5 = (adj_gp -l60.adj_gp)/l60.adj_sale

* how many nonmissing?
egen ncount_qual = rownonmiss(qmj_qual?)

forvalues j=1/5 {
	qui bysort yyyymm: egen r = rank(qmj_qual`j') if ~missing(qmj_qual`j')
	qui bysort yyyymm: egen m = mean(r)
	qui bysort yyyymm: egen sd = sd(r)
	qui gen qmj_qualz`j' = (r - m) / sd
	drop r m sd
	}

* z-score over the sum
gen qmj_qualzsum = qmj_qualz1 + qmj_qualz2 + qmj_qualz3 + qmj_qualz4 + qmj_qualz5 if ncount_qual>0 & ~missing(ncount_qual)
qui bysort yyyymm: egen r = rank(qmj_qualzsum) 
qui bysort yyyymm: egen m = mean(r)
qui bysort yyyymm: egen sd = sd(r)
qui gen qmj_quality = (r - m) / sd
drop r m sd qmj_qual? qmj_qualz? qmj_qualzsum 

	

* component 3: safety

gen tlta=(dlc+dltt)/(at+0.1*(December_me-be))
gen wcta=(act-lct)/(at+0.1*(December_me-be))
gen clca=lct/act
gen oeneg=1 if lt>at
replace oeneg=0 if missing(oeneg)
gen nita=ib/at
gen futl=pi/lt
gen intwo=1 if (ib<0 & L1ib<0)
replace intwo=0 if missing(intwo)
gen chin=(ib-L1ib)/(abs(ib)+abs(L1ib))
gen roe=ib/be

gen qmj_safety1=-(dltt+dlc+mibt+pstk)/at
gen qmj_safety2=-1.32-0.407*ln(at+0.1*(December_me-be))+6.03*tlta-1.43*wcta+0.076*clca-1.72*oeneg-1.83*futl+0.285*intwo-0.521*chin
gen qmj_safety3=(1.2*wc+1.4*re+3.3*ebit+0.6*December_me+sale)/at

xtset permno dateindex
gen L1roe = l12.roe
gen L2roe = l24.roe
gen L3roe = l36.roe
gen L4roe = l48.roe

egen qmj_safety4 = rowsd(roe L1roe L2roe L3roe L4roe)
drop *roe

* how many nonmissing?
egen ncount_safety = rownonmiss(qmj_safety?)

forvalues j=1/4 {
	qui bysort yyyymm: egen r = rank(qmj_safety`j') if ~missing(qmj_safety1,qmj_safety2,qmj_safety3,qmj_safety4)
	qui bysort yyyymm: egen m = mean(r)
	qui bysort yyyymm: egen sd = sd(r)
	qui gen qmj_safetyz`j' = (r - m) / sd
	drop r m sd
	}

* z-score over the sum
gen qmj_safetyzsum = qmj_safetyz1 + qmj_safetyz2 + qmj_safetyz3 + qmj_safetyz4 if ncount_safety>0 & ~missing(ncount_safety)
qui bysort yyyymm: egen r = rank(qmj_safetyzsum) 
qui bysort yyyymm: egen m = mean(r)
qui bysort yyyymm: egen sd = sd(r)
qui gen qmj_safety = (r - m) / sd
drop r m sd qmj_safety? qmj_safetyz? qmj_safetyzsum 


replace qmj_profitability = 0 if missing(qmj_profitability)
replace qmj_qual = 0 if missing(qmj_qual)
replace qmj_safety = 0 if missing(qmj_safety)

count if missing(ncount_prof)		
count if missing(ncount_qual)		
count if missing(ncount_safety)		

gen qmjsum = qmj_profitability + qmj_qual + qmj_safety if ncount_prof>0 | ncount_qual>0 | ncount_safety>0
qui bysort yyyymm: egen r = rank(qmjsum) 
qui bysort yyyymm: egen m = mean(r)
qui bysort yyyymm: egen sd = sd(r)
qui gen qmj = (r - m) / sd

keep permno yyyymm qmj
keep if ~missing(qmj)
compress

sort permno yyyymm

save "~/Documents/Temp/qmj.dta", replace
	
	
	
	
*===============================================================================
*
* ACCOUNTING-BASED ANOMALIES
*
*===============================================================================

use "~/Dropbox/Research/Data/CRSP-Compustat/merged_crsp_compustat.dta", clear

keep if mod(yyyymm,100)==6

gen sic2digit = trunc(siccd/100) if siccd>0 & siccd<9999
gen sic3digit = trunc(siccd/10)  if siccd>0 & siccd<9999 

foreach lag in "" "L1" "L2" "L3" "L4" "L5" "L6" {
	replace `lag'at = . if `lag'at==0
	}
	
gen accruals = ((act - L1act) - (che - L1che)) - ((lct - L1lct) - (dlc - L1dlc) - (txp - L1txp)) - dp

gen a1_accruals  = accruals / (0.5 * (at + L1at))

gen a2_assetgrowth = at / L1at - 1 if ~missing(at) 

gen a4_booktomarket  = be / December_me if be>0 & ~missing(be) & December_me>0 & ~missing(December_me)

* cash flow is income before extraordinary items (Compustat item IB) plus 
* equity’s share of depreciation (item DP) plus deferred taxes (item TXDI, 
* if available)

* Equity’s share is defined as market equity (price times shares outstanding
* from CRSP) divided by total assets (item AT) minus book equity plus market 
* equity.

gen a5_cashflowtoequity      = (ib + dp * December_me / (at - be) + txdi) / December_me if ~missing(txdi) & at - be > 0 
replace a5_cashflowtoequity  = (ib + dp * December_me / (at - be)) / December_me if missing(txdi) & at - be > 0 


* change in asset turnover
gen saleat = sale/at if sale>0
gen L1saleat = L1sale/L1at if L1sale>0

foreach var of varlist saleat L1saleat {
	qui replace `var' = 5  if `var'>5 & ~missing(`var')
	}

gen a6_aturnoverchg = saleat - L1saleat 

foreach v of varlist dlc dltt L1dlc L1dltt {
	qui replace `v' = 0 if missing(`v')
	}

gen a9_debtissuance  = dltis > 0 if ~missing(dltis)

replace a9_debtissuance = -1 if a9_debtissuance==0

gen a10_earningsprice  = ib / December_me if December_me>0 & ~missing(December_me)

foreach var of varlist a10* {
	qui replace `var' = -2 if `var' < -2 & ~missing(`var')
	qui replace `var' = 1 if `var' > 1 & ~missing(`var')
	}

replace pstkrv = 0 if missing(pstkrv)

gen a13_enterprisemultiple  = (me + dlc + dltt + pstkrv - che) / oibdp if me + dlc + dltt + pstkrv - che>1e-2 & oibdp>1e-2 & be>0 & ~missing(be)

replace a13_enterprisemultiple = 100 if a13_enterprisemultiple>100 & ~missing(a13_enterprisemultiple)

gen a14_grossprofitability  = (sale - cogs) / at if sale>0 & ~missing(sale) & ~missing(at)

gen a15_inventorygrowth  = (invt - L1invt) / (0.5 * (at + L1at)) if invt>=0 & ~missing(invt) & L1invt>=0 & ~missing(L1invt) & ~missing(at) & ~missing(L1at)


* Hou and Robinson's Herfindahl index
preserve

*Regulated industries include railroads (SIC code 4011) through 1980, trucking (4210 and 4213) through 1980, airlines (4512) through 1978, telecommunica- tions (4812 and 4813) through 1982, and gas and electric utilities (4900 to 4939).
gen regindustry = 1 if (siccd==4011 & yyyymm<=198106) | (siccd>=4210 & siccd<=4213 & yyyymm<=198106) | (siccd==4512 & yyyymm<=197906) | (siccd>=4812 & siccd<=4813 & yyyymm<=198306) | (siccd>=4900 & siccd<=4939)
drop if regindustry==1

keep yyyymm sic3digit sale

bysort yyyymm sic3digit: egen salesum = sum(sale) if ~missing(sale) & sale>0
gen double si2 = (sale / salesum)^2 if ~missing(sale) & sale>0

collapse (sum) si2, by(yyyymm sic3digit)

replace si2 = . if si2==0

gen long yyyy = trunc(yyyymm/100)

xtset sic3digit yyyy
tsfill

sort sic3digit yyyy
gen L1_si2 = si2[_n-1] if sic3digit==sic3digit[_n-1]
sort sic3digit yyyy
gen L2_si2 = si2[_n-2] if sic3digit==sic3digit[_n-2]

egen a26_herfindahl = rowmean(si2 L1_si2 L2_si2)

drop if missing(yyyymm)

keep sic3digit yyyymm a26*
qui save "~/Documents/Temp/herfindahl.dta", replace
restore

merge m:1 sic3digit yyyymm using "~/Documents/Temp/herfindahl.dta", nogenerate keep(1 3)

* Net operating assets
foreach var of varlist dltt {
	qui gen tmp_`var' = `var'
	qui replace tmp_`var' = 0 if missing(tmp_`var')
	}
	
gen a21_netoperatingassets  = ((at - che) - (at - dlc - tmp_dltt - be)) / L1at

*===============================================================================
* Components of Piotroski's F-Score
*===============================================================================
gen f1 = ib>0 if ~missing(ib)
gen f2 = oibdp>0 if ~missing(oibdp)
gen f3 = ib / at >= L1ib / L1at if ~missing(ib) & ~missing(at) & ~missing(L1ib) & ~missing(L1at) 	
gen f4 = accruals<=0 if ~missing(accruals)
gen f5 = dltt/at<=L1dltt/L1at if ~missing(dltt) & ~missing(at) & ~missing(L1dltt) & ~missing(L1at)
gen f6 = act/lct >= L1act/L1lct if ~missing(act) & ~missing(lct) & ~missing(L1act) & ~missing(L1lct)
gen f7 = adj_shares<=L1adj_shares
gen f8 = (sale - cogs)/sale > (L1sale - L1cogs)/L1sale if ~missing(sale) & sale>=0 & ~missing(L1sale) & L1sale>=0
gen f9 = sale/at > L1sale/L1at if ~missing(sale) & sale>=0 & ~missing(at) & ~missing(L1sale) & L1sale>=0 & ~missing(L1at) 
	
gen a17_piotroskiF  = f1 + f2 + f3 + f4 + f5 + f6 + f7 + f8 + f9


gen capxsale = capx/sale
gen L1capxsale = L1capx/L1sale
gen L2capxsale = L2capx/L2sale
gen L3capxsale = L3capx/L3sale

foreach var of varlist *capxsale* {
	qui replace `var' = 0 if `var'<0
	qui replace `var' = 3 if `var'>3 & ~missing(`var')
	}

gen a18_abnormalinvestment = capxsale / ((1/3)*(L1capxsale + L2capxsale + L3capxsale)) if (1/3)*(L1capxsale + L2capxsale + L3capxsale)>0

qui replace a18_abnormalinvestment = 5 if a18_abnormalinvestment>5 & ~missing(a18_abnormalinvestment)

gen a19_leverage = dltt / December_me

*=====================================================================
* Find accruals and be/me quintiles
*=====================================================================
gen double nyse_accruals     = a1_accruals if exchcd==1 & ~missing(me,a1_accruals,a4_booktomarket)
gen double nyse_booktomarket = a4_booktomarket if exchcd==1 & ~missing(me,a1_accruals,a4_booktomarket)

bysort yyyymm: egen macc = mean(nyse_accruals)
bysort yyyymm: egen sacc = sd(nyse_accruals)
bysort yyyymm: egen mbeme = mean(nyse_booktomarket)
bysort yyyymm: egen sbeme = sd(nyse_booktomarket)

qui gen a20_accruals_beme = (a4_booktomarket - mbeme) / sbeme - (a1_accruals - macc) / sacc
drop macc sacc mbeme sbeme

gen a22_netwcap_changes  = (((act - che) - (lct - dlc)) - ((L1act - L1che) - (L1lct - L1dlc))) / at

qui replace a22_netwcap_changes = 1 if a22_netwcap_changes>1 & ~missing(a22_netwcap_changes)

gen a24_oscore  = -1.32 - 0.407 * log(at) + 6.03 * lt / at - 1.43 * (act - lct) / at + 0.076 * lct / act - 1.72 * (lt > at) - 2.37 * ib / at - 1.83 * oiadp / lt + 0.285 * (ib < 0 & L1ib < 0) - 0.521 * (ib - L1ib) / (abs(ib) + abs(L1ib)) if ~missing(at,lt,act,lct,ib,oiadp,L1ib) & lt>=0

gen a25_profitmargin  = oiadp / sale if sale>0 & ~missing(oiadp,sale)

gen a62_profitability  = oiadp / at if ~missing(oiadp) 	

gen a64_returnonequity = ib / be


*===============================================================================
* sales growth computations
*===============================================================================
gen salesgrowth1 = sale / L1sale   if ~missing(sale,L1sale) & sale>0 & L1sale>0
gen salesgrowth2 = L1sale / L2sale if ~missing(L1sale,L2sale) & L1sale>0 & L2sale>0
gen salesgrowth3 = L2sale / L3sale if ~missing(L2sale,L3sale) & L2sale>0 & L3sale>0
gen salesgrowth4 = L3sale / L4sale if ~missing(L3sale,L4sale) & L3sale>0 & L4sale>0
gen salesgrowth5 = L4sale / L5sale if ~missing(L4sale,L5sale) & L4sale>0 & L5sale>0

forvalues k=1/5 {
	qui bysort yyyymm: egen rank`k' = rank(salesgrowth`k') if ~missing(salesgrowth1, salesgrowth2, salesgrowth3, salesgrowth4, salesgrowth5)
	qui bysort yyyymm: egen nobs`k' = max(rank`k')
	qui gen pranksales`k' = (rank`k' - 1) / (nobs`k' - 1)
	drop rank`k' nobs`k'
	}
	
*===============================================================================
	
gen a29_salesgrowth  = 5 * pranksales1 + 4 * pranksales2 + 3 * pranksales3 + 2 * pranksales4 + 1 * pranksales5

drop salesgrowth? pranksales?

gen a30_salestoprice = sale / December_me if ~missing(sale,December_me) & sale>=0 & December_me>0

foreach var of varlist a30* {
	qui replace `var' = 50 if `var' > 50 & ~missing(`var')
	}

gen shareissuance1 = ln(adj_shares / L1adj_shares) if ~missing(adj_shares,L1adj_shares) & adj_shares>0 & L1adj_shares>0
gen shareissuance5 = ln(adj_shares / L5adj_shares) if ~missing(adj_shares,L5adj_shares) & adj_shares>0 & L5adj_shares>0

gen a31_shareissuance1     = 1  if shareissuance1 > 0 & ~missing(shareissuance1)
replace a31_shareissuance1 = 0  if shareissuance1 ==0 & ~missing(shareissuance1)
replace a31_shareissuance1 = -1 if shareissuance1 < 0 & ~missing(shareissuance1)
gen a32_shareissuance5     = 1  if shareissuance5 > 0 & ~missing(shareissuance5)
replace a32_shareissuance5 = 0  if shareissuance5 ==0 & ~missing(shareissuance5)
replace a32_shareissuance5 = -1 if shareissuance5 < 0 & ~missing(shareissuance5)

gen a33_sustainablegrowth = be / L1be - 1 if ~missing(be,L1be) & be>0 & L1be>0

* verify units
replace dlcch = 0 if missing(dlcch)
gen a35_total_xfin  = (sstk - prstkc + dltis - dltr + dlcch - dv) / at

gen a36_zscore  = 1.2 * (act - lct) / at + 1.4 * (re / at) + 3.3 * ((ni + xint + txp) / at) + 0.6 * (December_me  / lt) + 1.0 * (sale / at) if ~missing(act,lct,re,at,ni,xint,txp,December_me,lt,sale) & sale>0

*===============================================================================
* capex - industry capex computations
*===============================================================================
gen dcapx = (capx - 0.5 * (L1capx + L2capx)) / (0.5*(L1capx + L2capx)) if ~missing(capx,L1capx,L2capx) & 0.5*(L1capx + L2capx) > 1e-2
replace dcapx = -1 if dcapx<-1 & ~missing(dcapx)
replace dcapx = 5 if dcapx>5 & ~missing(dcapx)
bysort yyyymm sic2digit: egen avg_dcapx = mean(dcapx)

*===============================================================================

gen a37_indadjcapxgrowth  = dcapx - avg_dcapx

gen a38_salesminusinventory = (sale - 0.5 * (L1sale + L2sale)) / (0.5 * (L1sale + L2sale)) - (invt - 0.5 * (L1invt + L2invt)) / (0.5 * (L1invt + L2invt)) if ~missing(sale,L1sale,L2sale,invt,L1invt,L2invt)

gen a40_investmenttocapital  = capx / L1ppent if ~missing(capx,L1ppent) & L1ppent>0

gen a41_invgrowthrate = capx / L1capx - 1 if ~missing(capx,L1capx) & capx>0 & L1capx>1e-2

replace a41_invgrowthrate = 10 if a41_invgrowthrate>10 & ~missing(a41_invgrowthrate)

gen a42_investmenttoassets = (ppent - L1ppent + invt - L1invt) / L1at if ~missing(ppent,L1ppent,invt,L1invt,L1at) 

* Profitability
qui gen qmj_prof1_gpoa = (sale - cogs) / at if ~missing(sale,cogs,at) 
qui gen qmj_prof2_roe = ib / be if ~missing(ib,be) & be>0
qui gen qmj_prof3_roa = ib / at if ~missing(ib,at) 

qui gen wc = act - lct - che + dlc + txp
qui gen L1wc = L1act - L1lct - L1che + L1dlc + L1txp

qui gen qmj_prof4_cfoa = (ni + dp - (wc - L1wc) - capx) / at if ~missing(ni,dp,wc,L1wc,capx,at) 
qui gen qmj_prof5_gmar = (sale - cogs) / sale if ~missing(sale,cogs) & sale>0
qui gen qmj_prof6_acc = -((wc - L1wc) - dp) / at if ~missing(wc,L1wc,dp,at) 

set varabbrev on

forvalues j=1/6 {
	qui bysort yyyymm: egen r = rank(qmj_prof`j') if ~missing(qmj_prof1,qmj_prof2,qmj_prof3,qmj_prof4,qmj_prof5,qmj_prof6)
	qui bysort yyyymm: egen m = mean(r)
	qui bysort yyyymm: egen sd = sd(r)
	qui gen qmj_z`j' = (r - m) / sd
	drop r m sd
	}

drop qmj_prof*
	
gen a43_qmj_profitability  = (qmj_z1 + qmj_z2 + qmj_z3 + qmj_z4 + qmj_z5 + qmj_z6) / 6

drop *qmj_z*

gen a48_chs_distress = chs_distress // this might need to be chs_distressQ ??? (not sure about CHS??)

gen a52_ffprofitability = (sale - cogs - xsga - xint ) / be if be>0 & ~missing(be)

* industry-adjusted organization capital
bysort yyyymm sic2digit: egen orank = rank(OrgCat) if OrgCat>0
bysort yyyymm sic2digit: egen N = max(orank)

gen a53_organizationcapital = (orank - 1) / (N - 1) if ~missing(orank)
drop orank N

gen a54_advertising = orig_xad / December_me if ~missing(orig_xad,December_me) & December_me>0 & yyyymm>=197306
gen a55_opleverage = (cogs + xsga) / L1at if ~missing(cogs,xsga,L1at) 
gen a56_rd = orig_xrd / December_me if ~missing(orig_xrd,December_me) & December_me>0 & yyyymm>=197306 
gen a57_tax = orig_txp / ni if ~missing(orig_txp,ni) & ni>0 & yyyymm>=197006
gen a58_cbop = (sale - cogs - (xsga - xrd) - (rect - L1rect) - (invt - L1invt) + (ap - L1ap) + (drc + drlt - L1drc - L1drlt) + (xacc - L1xacc) - (xpp - L1xpp)) / at

keep permno yyyymm ffindustry49 me prc siccd sale be ib exchcd retnm a?_* a??_* shareissuance1

compress

sort permno yyyymm

save "~/Documents/CRSP-Compustat/anomalydata.dta", replace


*===============================================================================
* Prepare the return-, price-, and volume-based variables
*===============================================================================

use "~/Dropbox/Research/Data/CRSP-Compustat/merged_crsp_compustat.dta", clear

keep if ~missing(retnm) & ~missing(me) 

keep permno yyyymm ffindustry49 dateindex be ib retnm exchcd siccd me r1_1 r12_2 r12_7 r60_13 seasonality seasonalityn marketbeta prc hs_div sharevolume mtw_initiation mtw_omission coskewness divyield

* Bring in (a) distance to 52-week high, (b) Amihud's illiquidity, and (c) maximum daily return
sort permno yyyymm
merge 1:1 permno yyyymm using "~/Dropbox/Research/Data/CRSP-Compustat/Computations/dailycrspmeasures.dta", sorted nogenerate keep(1 3)

* Add total & idiosyncratic volatility estimates
sort permno yyyymm
merge 1:1 permno yyyymm using "~/Dropbox/Research/Data/CRSP-Compustat/Computations/volatility.dta", sorted nogenerate keep(1 3) keepusing(idiosyncraticvol)

* Add Gervais et al. volume innovation
sort permno yyyymm
merge 1:1 permno yyyymm using "~/Dropbox/Research/Data/CRSP-Compustat/Computations/gkmvolume.dta", sorted nogenerate keep(1 3)

* Firm age in months
bysort permno: egen min_yyyymm = min(yyyymm)
gen long firmage = ((trunc(yyyymm/100) - 1900) * 12 + mod(yyyymm,100)) - ((trunc(min_yyyymm/100) - 1900) * 12 + mod(min_yyyymm,100)) + 1
drop min_yyyymm

* generate Momentum and LT reversal factor from z-scores
foreach var of varlist r12_2 r60_13 {
	qui bysort yyyymm: egen m = mean(`var')
	qui bysort yyyymm: egen s = sd(`var')
	qui gen z`var' = (`var' - m) / s
	drop m s
	}
qui gen double momreversal = zr12_2 - zr60_13
drop zr12_2 zr60_13

sort permno dateindex

qui gen a70_disttohigh = disttohigh[_n-1] if dateindex-dateindex[_n-1]==1 & permno==permno[_n-1]
qui gen a71_amihud = amihudmeasure
qui gen a72_marketbeta = marketbeta
qui gen a73_firmage = firmage 
qui gen a74_idiosyncraticvol = idiosyncraticvol
qui gen a76_ltrev = r60_13
qui gen a77_maxdailyret = maxdailyret
qui gen a78_momentum = r12_2
qui gen a79_intmomentum = r12_7
qui gen a80_nomprice = abs(prc)
qui gen a81_seasonality = seasonality if seasonalityn>=5
qui gen a82_strev = r1_1
qui gen a83_gkm_voldist = gkm_voldist
qui gen a84_divmonth = hs_div
qui gen a85_sharevolume = sharevolume
qui gen a86_coskewness = coskewness
qui gen a87_divyield = divyield
qui gen a88_momreversal = momreversal
qui gen a89_size = me
				
keep permno yyyymm ffindustry49 me be ib exchcd siccd retnm a*_* 

save "~/Documents/CRSP-Compustat/retbasedvars.dta", replace


* Construct a predictor file similar to the KNS input

use "~/Documents/CRSP-Compustat/retbasedvars.dta", clear

keep permno yyyymm a*_* 

merge 1:1 permno yyyymm using "~/Documents/Temp/qmj.dta", nogenerate keep(1 3)
 
merge 1:1 permno yyyymm using "~/Documents/CRSP-Compustat/anomalydata.dta", nogenerate keep(1 3) keepusing(a?_* a??_*)
drop adj_shares avg_dcapx

* carry accounting-based variables over the next year
gen long dateindex = (trunc(yyyymm/100)-1900)*12 + mod(yyyymm,100)
qui gen howoldrank = 0 if mod(yyyymm,100)==6
sort permno dateindex
qui replace howoldrank = howoldrank[_n-1] + (dateindex-dateindex[_n-1]) if permno==permno[_n-1] & missing(howoldrank) 
	
foreach var of varlist a1_accruals-a58_cbop {
	sort permno dateindex
	qui replace `var' = `var'[_n-1] if permno==permno[_n-1] & missing(`var') & ~missing(`var'[_n-1]) & howoldrank>0 & howoldrank<=11
	}
drop howoldrank dateindex

* rename
rename a70_disttohigh disttohigh
rename a71_amihud amihud
rename a72_marketbeta marketbeta
rename a73_firmage firmage
rename a74_idiosyncraticvol idiosyncraticvol
rename a76_ltrev ltrev
rename a77_maxdailyret maxdailyret
rename a78_momentum momentum
rename a79_intmomentum intmomentum
rename a80_nomprice nomprice
rename a81_seasonality seasonality
rename a82_strev strev
rename a83_gkm_voldist gkm_voldist
rename a84_divmonth divmonth
rename a85_sharevolume sharevolume
rename a86_coskewness coskewness
rename a87_divyield divyield
rename a88_momreversal momreversal
rename a89_size size

rename a1_accruals accruals
rename a2_assetgrowth assetgrowth
rename a4_booktomarket booktomarket
rename a5_cashflowtoequity cashflowtoequity
rename a6_aturnoverchg aturnoverchg
rename a9_debtissuance debtissuance
rename a10_earningsprice earningsprice
rename a13_enterprisemultiple enterprisemultiple
rename a14_grossprofitability grossprofitability
rename a15_inventorygrowth inventorygrowth
rename a26_herfindahl herfindahl
rename a21_netoperatingassets netoperatingassets
rename a17_piotroskiF piotroskiF
rename a18_abnormalinvestment abnormalinvestment
rename a19_leverage leverage
rename a20_accruals_beme accruals_beme
rename a22_netwcap_changes netwcap_changes
rename a24_oscore oscore
rename a25_profitmargin profitmargin
rename a62_profitability profitability
rename a64_returnonequity returnonequity
rename a29_salesgrowth salesgrowth
rename a30_salestoprice salestoprice
rename a31_shareissuance1 shareissuance1
rename a32_shareissuance5 shareissuance5
rename a33_sustainablegrowth sustainablegrowth
rename a35_total_xfin total_xfin
rename a36_zscore zscore
rename a37_indadjcapxgrowth indadjcapxgrowth
rename a38_salesminusinventory salesminusinventory
rename a40_investmenttocapital investmenttocapital
rename a41_invgrowthrate invgrowthrate
rename a42_investmenttoassets investmenttoassets
rename a43_qmj_profitability qmj_profitability
rename a48_chs_distress chs_distress
rename a52_ffprofitability ffprofitability
rename a53_organizationcapital organizationcapital
rename a54_advertising advertising
rename a55_opleverage opleverage
rename a56_rd rd
rename a57_tax tax
rename a58_cbop cbop

keep if yyyymm>=196306

sort permno yyyymm

save "~/Documents/CRSP-Compustat/fmpredictors.dta", replace





*===============================================================================
* Compute monthly industry returns
*===============================================================================
use "~/Dropbox/Research/Data/CRSP-Compustat/merged_crsp_compustat.dta", clear

summ yyyymm

keep permno yyyymm dateindex retnm me siccd hpindustry25 

* bring in Fama-French and Moskowitz-Grinblatt industries
merge m:1 siccd using "~/Dropbox/Research/Data/CRSP-Compustat/Computations/industry_from_siccodes.dta", nogenerate keep(1 3)

replace ffindustry10 = 10 if missing(ffindustry10) & siccd>0 & siccd<9999 & ~missing(siccd)
replace ffindustry17 = 17 if missing(ffindustry17) & siccd>0 & siccd<9999 & ~missing(siccd)
replace ffindustry30 = 30 if missing(ffindustry30) & siccd>0 & siccd<9999 & ~missing(siccd)
replace ffindustry48 = 48 if missing(ffindustry48) & siccd>0 & siccd<9999 & ~missing(siccd)
replace ffindustry49 = 49 if missing(ffindustry49) & siccd>0 & siccd<9999 & ~missing(siccd)

* roll yyyymm forward to be view from the viewpoint of returns
gen long yyyymm2 = yyyymm + 1 if mod(yyyymm,100)<12
replace  yyyymm2 = 100 * (trunc(yyyymm/100) + 1) + 1 if mod(yyyymm,100)==12
drop yyyymm
ren yyyymm2 yyyymm

keep if ~missing(me,retnm) 
qui gen double me_x_retnm = me * retnm

local j = 0

foreach inddef in "ffindustry17" "ffindustry30" "mgindustry" "ffindustry48" "ffindustry49" "hpindustry25" {

	preserve
	drop if missing(`inddef')
	collapse (sum) me me_x_ret (count) n=me, by(yyyymm dateindex `inddef')
	qui gen double `inddef'ret = me_x_ret / me if n>0
	drop me_x_ret me n
	
	xtset `inddef' dateindex

	qui gen double `inddef'retnm = F1.`inddef'ret
	rename `inddef' industryid
	
	local j = `j'+1
	
	if `j'>1 {
		qui merge 1:1 yyyymm industryid using "~/Dropbox/Research/Data/CRSP-Compustat/Computations/industryreturns.dta", nogenerate
		}		
	qui save "~/Dropbox/Research/Data/CRSP-Compustat/Computations/industryreturns.dta", replace
	restore

	}
	
	
* Daily industry returns
use "~/Dropbox/Research/Data/CRSP-Compustat/simple_dailyreturns.dta", clear

keep permno date Lme ret

gen long yyyymm = 100 * year(date) + month(date)

merge m:1 permno yyyymm using "~/Dropbox/Research/Data/CRSP-Compustat/Computations/siccodes.dta", nogenerate keep(1 3)

rename siccd2 siccd

keep if yyyymm>=196000

* bring in Fama-French and Moskowitz-Grinblatt industries
merge m:1 siccd using "~/Dropbox/Research/Data/CRSP-Compustat/Computations/industry_from_siccodes.dta", nogenerate keep(1 3)

replace ffindustry10 = 10 if missing(ffindustry10) & siccd>0 & siccd<9999 & ~missing(siccd)
replace ffindustry17 = 17 if missing(ffindustry17) & siccd>0 & siccd<9999 & ~missing(siccd)
replace ffindustry30 = 30 if missing(ffindustry30) & siccd>0 & siccd<9999 & ~missing(siccd)
replace ffindustry48 = 48 if missing(ffindustry48) & siccd>0 & siccd<9999 & ~missing(siccd)
replace ffindustry49 = 49 if missing(ffindustry49) & siccd>0 & siccd<9999 & ~missing(siccd)

drop yyyymm siccd

keep if ~missing(Lme,ret) 
qui gen double Lme_x_ret = Lme * ret
drop ret

local j = 0

foreach inddef in "ffindustry17" "ffindustry30" "ffindustry49" "ffindustry48" "mgindustry" {

	preserve
	drop if missing(`inddef')
	collapse (sum) Lme Lme_x_ret (count) n=Lme, by(date `inddef')
	qui gen double `inddef'ret = Lme_x_ret / Lme if n>0
	drop Lme_x_ret Lme n

	gen long yyyymmdd = 10000 * year(date) + 100 * month(date) + day(date)
	
	rename `inddef' industryid
	
	local j = `j'+1
	
	if `j'>1 {
		qui merge 1:1 date industryid using "~/Dropbox/Research/Data/CRSP-Compustat/Computations/industryreturns_daily.dta", nogenerate
		}		
	qui save "~/Dropbox/Research/Data/CRSP-Compustat/Computations/industryreturns_daily.dta", replace
	restore

	}
	
	
		
* create wide versions of MG industries

use "~/Dropbox/Research/Data/CRSP-Compustat/Computations/industryreturns.dta", clear
keep industryid yyyymm mgindustryret
rename mgindustryret mg_

reshape wide mg_, i(yyyymm) j(industryid)

drop mg_21-mg_49	

save "~/Dropbox/Research/Data/CRSP-Compustat/Computations/mgindustryreturns_monthly.dta", replace

use "~/Dropbox/Research/Data/CRSP-Compustat/Computations/industryreturns_daily.dta", clear
keep industryid yyyymmdd mgindustryret
rename mgindustryret mg_

keep if industryid<=20

reshape wide mg_, i(yyyymmdd) j(industryid)

save "~/Dropbox/Research/Data/CRSP-Compustat/Computations/mgindustryreturns_daily.dta", replace





	

*===============================================================================
* 4. Create the factors
*===============================================================================

* Prepare predictors

use "~/Documents/CRSP-Compustat/fmpredictors.dta", clear

merge 1:1 permno yyyymm using "~/Dropbox/Research/Data/CRSP-Compustat/merged_crsp_compustat.dta", nogenerate keepusing(dateindex retnm exchcd siccd me)

* bring in Fama-French and Moskowitz-Grinblatt industries
merge m:1 siccd using "~/Dropbox/Research/Data/CRSP-Compustat/Computations/industry_from_siccodes.dta", nogenerate keep(1 3) keepusing(ffindustry49)
replace ffindustry49 = 49 if missing(ffindustry49) & siccd>0 & siccd<9999 & ~missing(siccd)
drop siccd

* merge in industry returns
gen industryid = ffindustry49
merge m:1 industryid yyyymm using "~/Dropbox/Research/Data/CRSP-Compustat/Computations/industryreturns.dta", nogenerate keep(1 3) keepusing(ffindustry49retnm)
drop industryid
ren ffindustry49retnm iretnm
	
keep if yyyymm>=196306
	
save "~/Documents/CRSP-Compustat/FMcharacteristics.dta", replace




local bDoDaily=1
local bDoFP=0

local varctr = 0

foreach var in size disttohigh amihud marketbeta firmage idiosyncraticvol ltrev maxdailyret momentum intmomentum nomprice seasonality strev gkm_voldist divmonth sharevolume coskewness divyield momreversal qmj accruals assetgrowth booktomarket cashflowtoequity aturnoverchg debtissuance earningsprice enterprisemultiple grossprofitability inventorygrowth herfindahl netoperatingassets piotroskiF abnormalinvestment leverage accruals_beme netwcap_changes oscore profitmargin profitability returnonequity salesgrowth salestoprice shareissuance1 shareissuance5 sustainablegrowth total_xfin zscore indadjcapxgrowth salesminusinventory investmenttocapital invgrowthrate investmenttoassets qmj_profitability chs_distress ffprofitability organizationcapital advertising opleverage rd tax cbop {

	local varctr = `varctr' + 1

	disp("Variable `varctr': `var'")
	
	if `varctr'<=24 {
		continue
	}
	
	use "~/Documents/CRSP-Compustat/FMcharacteristics.dta", clear

	if "`var'"=="size" {
		local var = "booktomarket"
		local sizeoverload = 1
		}
	else {
		local sizeoverload = 0
		}
		
	keep permno yyyymm dateindex me exchcd `var' retnm iretnm ffindustry49
	
	rename `var' z

	
	* Is this factor rebalanced annually?
	local b_annual = 0
	
	foreach annualvar in size qmj accruals assetgrowth booktomarket cashflowtoequity aturnoverchg debtissuance earningsprice enterprisemultiple grossprofitability inventorygrowth herfindahl netoperatingassets piotroskiF abnormalinvestment leverage accruals_beme netwcap_changes oscore profitmargin profitability returnonequity salesgrowth salestoprice shareissuance1 shareissuance5 sustainablegrowth total_xfin zscore indadjcapxgrowth salesminusinventory investmenttocapital invgrowthrate investmenttoassets qmj_profitability chs_distress ffprofitability organizationcapital advertising opleverage rd tax cbop {
	
		if inlist("`var'","`annualvar'") {
			local b_annual = 1
			}
		
		}

	if `b_annual'==1 {
		preserve
		qui keep if mod(yyyymm,100)==6
		}
				
	qui drop if missing(me,retnm,z)
	
	* == CONSTRUCT THE STANDARD PORTFOLIO ASSUMING A CONTINUOUS VARIABLE
	
	* find the size breakpoint
	
	qui gen nyse_me = me if exchcd==1
	
	bysort yyyymm: egen p50 = pctile(nyse_me), p(50)
	
	qui gen     Qme = 1 if me<=p50 & ~missing(me,p50)
	qui replace Qme = 2 if me>p50  & ~missing(me,p50)
	
	drop nyse_me p50 
	
	* find breakpoints for the predictor

	qui gen nyse_z = z if exchcd==1
	
	bysort yyyymm: egen p30 = pctile(nyse_z), p(30)
	bysort yyyymm: egen p70 = pctile(nyse_z), p(70)
	
	qui gen     Qsortvar = 1 if           z <= p30 & ~missing(p30)                 & ~missing(z)
	qui replace Qsortvar = 2 if z > p30 & z <= p70 & ~missing(p30) & ~missing(p70) & ~missing(z)
	qui replace Qsortvar = 3 if z > p70            & ~missing(p70)                 & ~missing(z)

	drop nyse_z p30 p70

	* figure out if the variable is categorical and, if so, how many categories
	bysort yyyymm: egen mmin = min(z)
	bysort yyyymm: egen mmax = max(z)

	* 'age' creates a problem because so many firms trace back to the same date
	qui replace Qsortvar = 3 if z==mmax 
	
	* == EXCEPTIONS FOR THE DISCRETE CASES
	
	* is the variable:
	* 1) a discrete -1, 1 variable
	* 2) a discrete -1, 0, 1 variable
	* 3) a continuous variable

	qui gen mid_z = z if z>mmin & z<mmax
	bysort yyyymm: egen nmid = count(mid_z)
	
	qui bysort yyyymm: egen m2min = min(mid_z)
	qui bysort yyyymm: egen m2max = max(mid_z)

	qui gen     dtype = 1 if nmid==0
	qui replace dtype = 2 if nmid>0 & ~missing(nmid) & m2min==m2max
	qui replace dtype = 3 if nmid>0 & ~missing(nmid) & m2min~=m2max
	
	qui replace Qsortvar = . if dtype==1 | dtype==2

	* -1/1 variable
	qui replace Qsortvar = 1 if dtype==1 & z==mmin
	qui replace Qsortvar = 3 if dtype==1 & z==mmax
	
	* -1/0/+1 variable
	qui replace Qsortvar = 1 if dtype==2 & z==mmin
	qui replace Qsortvar = 2 if dtype==2 & z>mmin & z<mmax
	qui replace Qsortvar = 3 if dtype==2 & z==mmax
			
	drop mmin mmax mid_z nmid m2min m2max

	* compute Frazzini-Pedersen weights
	if `bDoFP'==1 {
		if `sizeoverload'==0 {
			qui bysort yyyymm: egen r = rank(z)
			}
		else {
			qui bysort yyyymm: egen r = rank(me)
			}		
		qui bysort yyyymm: egen n = max(r)
		qui gen rc = r / (n + 1)
		qui bysort yyyymm: egen mrc = mean(rc)
		qui gen absdev = abs(rc - mrc)
		qui bysort yyyymm: egen sumabsdev = sum(absdev)
		qui gen double w = (rc - mrc) / sumabsdev
		drop r n rc mrc absdev sumabsdev
		}
	
	* industry-neutral predictor

	* demean the predictor
	qui bysort yyyymm ffindustry49: egen m = mean(z) if ~missing(ffindustry49)
	qui gen z_in = z - m
	drop m
	
	* find breakpoints for the predictor
	qui gen nyse_z = z_in if exchcd==1	
	qui bysort yyyymm: egen p30 = pctile(nyse_z), p(30)
	qui bysort yyyymm: egen p70 = pctile(nyse_z), p(70)
	
	qui gen     inQsortvar = 1 if              z_in <= p30 & ~missing(p30)                 & ~missing(z_in)
	qui replace inQsortvar = 2 if z_in > p30 & z_in <= p70 & ~missing(p30) & ~missing(p70) & ~missing(z_in)
	qui replace inQsortvar = 3 if z_in > p70               & ~missing(p70)                 & ~missing(z_in)

	drop p30 p70 nyse_z
	
	* if we are doing size, also demean size
	if `sizeoverload'==1 {
		qui gen me2 = me
		qui bysort yyyymm ffindustry49: egen m = mean(me2) if ~missing(ffindustry49)
		qui replace me2 = me2 - m
		
		* find the size breakpoint	
		qui gen nyse_me = me2 if exchcd==1 & ~missing(ffindustry49)
		
		bysort yyyymm: egen p50 = pctile(nyse_me), p(50)
		
		qui gen     inQme = 1 if me2<=p50 & ~missing(me2,p50)
		qui replace inQme = 2 if me2>p50  & ~missing(me2,p50)
		
		}
	else {
		qui gen inQme = Qme if ~missing(inQsortvar)
		}
	
	drop exchcd	
	
	* compute Frazzini-Pedersen weights
	if `bDoFP'==1 {
		if `sizeoverload'==0 {
			qui bysort yyyymm: egen r = rank(z_in)
			}
		else {
			qui bysort yyyymm: egen r = rank(me2)
			}		
		qui bysort yyyymm: egen n = max(r)
		qui gen rc = r / (n + 1)
		qui bysort yyyymm: egen mrc = mean(rc)
		qui gen absdev = abs(rc - mrc)
		qui bysort yyyymm: egen sumabsdev = sum(absdev)
		qui gen double win = (rc - mrc) / sumabsdev
		}
	else {
		qui gen w = 1
		qui gen win = 1
		}
		
	* If categorical predictors, override industry-neutrality
	qui replace inQsortvar = Qsortvar if dtype==1 | dtype==2
		
	qui summ dtype
	local dtype = `r(mean)'
	drop dtype
		
	if `b_annual'==1 {
		
		keep permno yyyymm Qme Qsortvar inQme inQsortvar w win
		
		qui save "~/Documents/CRSP-Compustat/assignments.dta", replace
		
		restore
		
		qui merge 1:1 permno yyyymm using "~/Documents/CRSP-Compustat/assignments.dta", nogenerate 
		
		qui gen howoldrank = 0 if mod(yyyymm,100)==6
		sort permno dateindex
		qui replace howoldrank = howoldrank[_n-1] + (dateindex-dateindex[_n-1]) if permno==permno[_n-1] & missing(howoldrank) 
				
		foreach copyvar of varlist Qsortvar Qme inQsortvar inQme w win {
			sort permno dateindex
			qui replace `copyvar' = `copyvar'[_n-1] if permno==permno[_n-1] & missing(`copyvar') & ~missing(`copyvar'[_n-1]) & howoldrank>0 & howoldrank<=11
			}
		drop howoldrank
		
		disp("Annual")
		
		}		
		
	keep permno me retnm iretnm yyyymm Qme Qsortvar inQme inQsortvar w win
	
	qui keep if ~missing(Qme,Qsortvar,me,retnm)
	
	qui save "~/Documents/Temp/portfolioassignments.dta", replace
	
	gen double me_x_retnm = me * retnm 
	
	* Standard factors
	preserve

	gen double w_x_retnm = w * retnm 
	
	collapse (sum) me_x_retnm me w_x_retnm, by(yyyymm Qme Qsortvar) fast
	
	qui gen double vwret = me_x_retnm / me if me>0
		
	qui gen     FFport = 1 if Qme==1 & Qsortvar==1	
	qui replace FFport = 2 if Qme==1 & Qsortvar==2	
	qui replace FFport = 3 if Qme==1 & Qsortvar==3	
	qui replace FFport = 4 if Qme==2 & Qsortvar==1	
	qui replace FFport = 5 if Qme==2 & Qsortvar==2	
	qui replace FFport = 6 if Qme==2 & Qsortvar==3	
	
	qui drop if missing(FFport)
	
	keep yyyymm FFport vwret w_x_retnm

	qui reshape wide vwret w_x_retnm, i(yyyymm) j(FFport)	

	if `sizeoverload'==0 {
		qui gen double factorret = (1/2) * (vwret3 + vwret6) - (1/2) * (vwret1 + vwret4)		
		qui gen double factorret_small = vwret3 - vwret1		
		qui gen double factorret_big = vwret6 - vwret4	
		}
	else {
		qui gen double factorret  = (1/3) * (vwret1 + vwret2 + vwret3) - (1/3) * (vwret4 + vwret5 + vwret6)
		}
	
	qui egen fp_factorret = rowtotal(w_x_retnm?)
	
	keep yyyymm factorret* fp_factorret
	
	qui save "~/Documents/CRSP-Compustat/tmp_factor1.dta", replace
	
	restore
		
	* INDUSTRY-NEUTRAL FACTORS
	drop Qme Qsortvar
	
	qui keep if ~missing(inQme,inQsortvar,iretnm)
	
	gen double me_x_iretnm = me * (retnm - iretnm) 
	
	gen double win_x_retnm = win * retnm 
	gen double win_x_iretnm = win * (retnm - iretnm) 
	
	collapse (sum) me_x_retnm me_x_iretnm me win_x_retnm win_x_iretnm, by(yyyymm inQme inQsortvar) fast
	
	qui gen double vwret = me_x_retnm / me if me>0
	qui gen double vwreti = me_x_iretnm / me if me>0
		
	qui gen     FFport = 1 if inQme==1 & inQsortvar==1	
	qui replace FFport = 2 if inQme==1 & inQsortvar==2	
	qui replace FFport = 3 if inQme==1 & inQsortvar==3	
	qui replace FFport = 4 if inQme==2 & inQsortvar==1	
	qui replace FFport = 5 if inQme==2 & inQsortvar==2	
	qui replace FFport = 6 if inQme==2 & inQsortvar==3	
	
	qui drop if missing(FFport)
	
	keep yyyymm FFport vwret vwreti win_x_retnm win_x_iretnm

	qui reshape wide vwret vwreti win_x_retnm win_x_iretnm, i(yyyymm) j(FFport)	

	if `sizeoverload'==0 {
		qui gen double factorret_in  = (1/2) * (vwret3 + vwret6) - (1/2) * (vwret1 + vwret4)		
		qui gen double factorret_inh = (1/2) * (vwreti3 + vwreti6) - (1/2) * (vwreti1 + vwreti4)		
		qui gen double factorret_small_in = vwret3 - vwret1		
		qui gen double factorret_small_inh = vwreti3 - vwreti1		
		qui gen double factorret_big_in = vwret6 - vwret4	
		qui gen double factorret_big_inh = vwreti6 - vwreti4	
		}
	else {
		qui gen double factorret_in  = (1/3) * (vwret1 + vwret2 + vwret3) - (1/3) * (vwret4 + vwret5 + vwret6)
		qui gen double factorret_inh = (1/3) * (vwreti1 + vwreti2 + vwreti3) - (1/3) * (vwreti4 + vwreti5 + vwreti6)
		}
	
	qui egen fp_factorret_in = rowtotal(win_x_retnm?)
	qui egen fp_factorret_inh = rowtotal(win_x_iretnm?)
	
	keep yyyymm factorret*_in factorret*_inh fp_factorret_in fp_factorret_inh
	
	qui merge 1:1 yyyymm using "~/Documents/CRSP-Compustat/tmp_factor1.dta", nogenerate 
	
	* Add information
	gen factorid = `varctr'

	if `sizeoverload'==0 {
		gen factorname = "`var'"
		}
	else {
		gen factorname = "size"
		}
		
	* roll yyyymm forward	
	qui gen long yyyymm_nm = yyyymm + 1 if mod(yyyymm,100)<12
	qui replace  yyyymm_nm = (trunc(yyyymm/100) + 1) * 100 + 1 if mod(yyyymm,100)==12
	drop yyyymm
	rename yyyymm_nm yyyymm
	
	order factorid factorname yyyymm factorret* 
	
	gen byte dtype = `dtype' 
	
	if `varctr'>1 {
		qui append using "~/Dropbox/Research/Data/CRSP-Compustat/Computations/fmfactors.dta"
		}
	qui save "~/Dropbox/Research/Data/CRSP-Compustat/Computations/fmfactors.dta", replace
	
	* COMPUTE DAILY FACTOR RETURNS
	if `bDoDaily'==1 {
		
		* Start from a file with monthly returns, and merge in portfolio sort information
		use "~/Dropbox/Research/Data/CRSP-Compustat/simple_dailyreturns.dta", clear

		keep permno date yyyymm Lme ret
		
		rename Lme me
	
		qui merge m:1 permno yyyymm using "~/Documents/Temp/portfolioassignments.dta", nogenerate keepusing(Qme Qsortvar inQme inQsortvar)
		drop yyyymm
		
		qui gen double me_x_ret = me * ret 
		
		preserve
			
		drop inQme inQsortvar
		
		collapse (sum) me_x_ret me, by(date Qme Qsortvar) fast
		
		qui gen double vwret = me_x_ret / me if me>0
		
		qui gen     FFport = 1 if Qme==1 & Qsortvar==1	
		qui replace FFport = 2 if Qme==1 & Qsortvar==2	
		qui replace FFport = 3 if Qme==1 & Qsortvar==3	
		qui replace FFport = 4 if Qme==2 & Qsortvar==1	
		qui replace FFport = 5 if Qme==2 & Qsortvar==2	
		qui replace FFport = 6 if Qme==2 & Qsortvar==3	
		
		qui drop if missing(FFport)
		
		keep date FFport vwret

		qui reshape wide vwret, i(date) j(FFport)	

		if `sizeoverload'==0 {
			qui gen double factorret = (1/2) * (vwret3 + vwret6) - (1/2) * (vwret1 + vwret4)		
			}
		else {
			qui gen double factorret  = (1/3) * (vwret1 + vwret2 + vwret3) - (1/3) * (vwret4 + vwret5 + vwret6)
			}
		
		keep date factorret
		qui drop if missing(factorret)
		
		qui save "~/Documents/CRSP-Compustat/tmp_factor1.dta", replace
		
		restore
		
			
		* Industry-neutral factors
		drop Qme Qsortvar
		
		qui keep if ~missing(inQme,inQsortvar)
			
		collapse (sum) me_x_ret me, by(date inQme inQsortvar) fast
		
		qui gen double vwret = me_x_ret / me if me>0
			
		qui gen     FFport = 1 if inQme==1 & inQsortvar==1	
		qui replace FFport = 2 if inQme==1 & inQsortvar==2	
		qui replace FFport = 3 if inQme==1 & inQsortvar==3	
		qui replace FFport = 4 if inQme==2 & inQsortvar==1	
		qui replace FFport = 5 if inQme==2 & inQsortvar==2	
		qui replace FFport = 6 if inQme==2 & inQsortvar==3	
		
		qui drop if missing(FFport)
		
		keep date FFport vwret

		qui reshape wide vwret, i(date) j(FFport)	

		if `sizeoverload'==0 {
			qui gen double factorret_in = (1/2) * (vwret3 + vwret6) - (1/2) * (vwret1 + vwret4)		
			}
		else {
			qui gen double factorret_in  = (1/3) * (vwret1 + vwret2 + vwret3) - (1/3) * (vwret4 + vwret5 + vwret6)
			}
		
		keep date factorret_in
		qui drop if missing(factorret_in)
		
		qui merge 1:1 date using "~/Documents/CRSP-Compustat/tmp_factor1.dta", nogenerate 
		
		
		* Add information
		gen factorid = `varctr'

		if `sizeoverload'==0 {
			gen factorname = "`var'"
			}
		else {
			gen factorname = "size"
			}
			
		order factorid factorname date factorret factorret_in
		
		if `varctr'>1 {
			qui append using "~/Dropbox/Research/Data/CRSP-Compustat/Computations/fmfactors_daily.dta"
			}
		qui save "~/Dropbox/Research/Data/CRSP-Compustat/Computations/fmfactors_daily.dta", replace
	
		}
	}

	
* Save the small set of monthly and daily factors

* Monthly

use "~/Dropbox/Research/Data/CRSP-Compustat/Computations/fmfactors.dta", clear

gen long factorno = 2 if factorname=="size"
replace  factorno = 3 if factorname=="booktomarket"
replace  factorno = 4 if factorname=="ffprofitability"
replace  factorno = 5 if factorname=="assetgrowth"
replace  factorno = 6 if factorname=="ltrev"
replace  factorno = 7 if factorname=="idiosyncraticvol"
replace  factorno = 8 if factorname=="qmj"
replace  factorno = 9 if factorname=="marketbeta"
		
keep if ~missing(factorno)
		
keep yyyymm factorno factorret

reshape wide factorret, i(yyyymm) j(factorno)
		
rename factorret2 smb2
rename factorret3 hml2
rename factorret4 rmw
rename factorret5 cma
rename factorret6 ltrev
rename factorret7 rvar
rename factorret8 qmj
rename factorret9 bab

merge 1:1 yyyymm using "~/Dropbox/Research/Anomalies/Fama-French factors/fffactors.dta", nogenerate keepusing(mktrf smb hml rf)

* verify
corr smb smb2 hml hml2

drop smb hml
rename smb2 smb
rename hml2 hml

save "~/Dropbox/Research/Factor Momentum 2/Data/Factor and Industry Data/clean/fm_monthlyfactors3.dta", replace

* Daily

use "~/Dropbox/Research/Data/CRSP-Compustat/Computations/fmfactors_daily.dta", clear
drop if missing(date)

gen long factorno = 2 if factorname=="size"
replace  factorno = 3 if factorname=="booktomarket"
replace  factorno = 4 if factorname=="ffprofitability"
replace  factorno = 5 if factorname=="assetgrowth"
replace  factorno = 6 if factorname=="ltrev"
replace  factorno = 7 if factorname=="idiosyncraticvol"
replace  factorno = 8 if factorname=="qmj"
replace  factorno = 9 if factorname=="marketbeta"
		
keep if ~missing(factorno)
		
keep date factorno factorret

reshape wide factorret, i(date) j(factorno)
		
rename factorret2 smb2
rename factorret3 hml2
rename factorret4 rmw
rename factorret5 cma
rename factorret6 ltrev
rename factorret7 rvar
rename factorret8 qmj
rename factorret9 bab

merge 1:1 date using "~/Dropbox/Research/Anomalies/Fama-French factors/daily_fffactors.dta", nogenerate keepusing(mktrf smb hml rf) keep(1 3)

* verify
corr smb smb2 hml hml2

drop smb hml
rename smb2 smb
rename hml2 hml

gen long yyyymmdd = 10000 * year(date) + 100 * month(date) + day(date)

save "~/Dropbox/Research/Factor Momentum 2/Data/Factor and Industry Data/clean/fm_dailyfactors3.dta", replace


